{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/husein/dev/malaya/malaya/tokenizer.py:208: FutureWarning: Possible nested set at position 3386\n",
      "  self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n",
      "/home/husein/dev/malaya/malaya/tokenizer.py:208: FutureWarning: Possible nested set at position 3904\n",
      "  self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import malaya\n",
    "from tqdm import tqdm\n",
    "from unidecode import unidecode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "\n",
    "def cleaning(string):\n",
    "    splitted = malaya.text.function.split_into_sentences(string)\n",
    "    if not len(splitted):\n",
    "        splitted = '. '.join([k.strip() for k in string.split('.') if len(k.strip())])\n",
    "    if splitted[0][0] == '-':\n",
    "        splitted[0] = splitted[0].replace('- ','')\n",
    "    points = [f'{no + 1}. {s}' for no, s in enumerate(splitted)]\n",
    "    points = ' '.join(points)\n",
    "    return points\n",
    "\n",
    "def simple_cleaning(string):\n",
    "    return re.sub(r'[ ]+', ' ', unidecode(string).replace('\\n', ' ').replace('--', ' ').replace('/', ' ')).strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from malaya.text.vectorizer import SkipGramCountVectorizer\n",
    "\n",
    "stopwords = malaya.text.function.get_stopwords()\n",
    "bow = CountVectorizer(\n",
    "    ngram_range = (1, 4),\n",
    "    stop_words = stopwords,\n",
    "    lowercase = False,\n",
    ")\n",
    "\n",
    "stopwords = malaya.text.function.get_stopwords()\n",
    "skip_bow = SkipGramCountVectorizer(\n",
    "    ngram_range = (1, 4),\n",
    "    stop_words = stopwords,\n",
    "    lowercase = False,\n",
    "    skip = 2\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['berita-bisnes.json.nested.semisupervised',\n",
       " 'berita-dunia.json.nested.semisupervised',\n",
       " 'berita-hiburan.json.nested.semisupervised',\n",
       " 'berita-malaysia.json.nested.semisupervised',\n",
       " 'berita-politik.json.nested.semisupervised',\n",
       " 'berita-sukan.json.nested.semisupervised',\n",
       " 'berita-teknologi.json.nested.semisupervised',\n",
       " 'gaya-hidup.json.nested.semisupervised']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from glob import glob\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "files = sorted(glob('*.nested.semisupervised'))\n",
    "files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "9622it [00:05, 1790.89it/s]\n",
      "7897it [00:04, 1701.46it/s]\n",
      "9956it [00:06, 1430.57it/s]\n",
      "8578it [00:05, 1662.30it/s]\n",
      "9970it [00:06, 1596.62it/s]\n",
      "9826it [00:05, 1750.73it/s]\n",
      "972it [00:00, 1649.89it/s]\n",
      "5342it [00:04, 1270.26it/s]\n"
     ]
    }
   ],
   "source": [
    "before, after = [], []\n",
    "\n",
    "count = 0\n",
    "rejected = []\n",
    "languages = []\n",
    "accepted = []\n",
    "para = []\n",
    "malaysian_news = {\n",
    "    'kosmo',\n",
    "    'hmetro',\n",
    "    'malaymail',\n",
    "    'projekmm',\n",
    "    'bharian',\n",
    "    'utusan',\n",
    "    'astroawani',\n",
    "    'themalaysianinsight',\n",
    "    'malaysiakini',\n",
    "    'bernama'\n",
    "}\n",
    "\n",
    "def reject(data):\n",
    "    if data['news'] in malaysian_news:\n",
    "        return False\n",
    "    if any([n in data['top-image'] for n in malaysian_news]):\n",
    "        return False\n",
    "    if any([n in data['url'] for n in malaysian_news]):\n",
    "        return False\n",
    "    if 'com.my' in data['top-image']:\n",
    "        return False\n",
    "    if data['language'] == 'malay':\n",
    "        return False\n",
    "    if 'Siaran Pers' in data['news']:\n",
    "        return True\n",
    "    if '.id' in data['news']:\n",
    "        return True\n",
    "    \n",
    "    return True\n",
    "\n",
    "for f in files:\n",
    "    with open(f) as fopen:\n",
    "        for l in tqdm(fopen):\n",
    "            data = json.loads(l)\n",
    "            soup = BeautifulSoup(data['r']['response']['articleBody'], \"lxml\")\n",
    "            text = BeautifulSoup(soup.text, 'lxml').text\n",
    "            text = re.sub(r'[ ]+', ' ', text).strip()\n",
    "            if 'kindly register' in text.lower() or 'disabled in your browser' in text.lower():\n",
    "                continue\n",
    "            if len(text.split()) < 30:\n",
    "                continue\n",
    "            accepted.append(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "62150"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(accepted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'KUALA LUMPUR: Ringgit dijangka diniagakan pada paras 4.38 hingga 4.40 berbanding dolar Amerika Syarikat (AS) minggu depan dengan kecenderungan meningkat terutamanya jika pasaran pertukaran asing terus menghadapi kelembapan dalam kitaran peningkatan kadar faedah AS dengan barisan Kabinet baharu Malaysia menawarkan sokongan selanjutnya.\\n\\r\\nPengarah Urusan SPI Asset Management Stephen Innes berkata ini berdasarkan situasi yang masih tenang dan rancangan China untuk meneruskan pembukaan semula negara itu.\\n\\r\\n\"Satu-satunya cabaran yang saya perhatikan adalah potensi pergolakan awam di China pada akhir minggu ini kerana jika kita mengingat kembali pergolakan di Hong Kong pada beberapa tahun lepas, ia sebenarnya memuncak pada hujung minggu,\" katanya kepada Bernama.\\n \\r\\nBeliau berkata pengumuman barisan Kabinet baharu Malaysia malam tadi akan memberi laluan kepada pembentangan bajet negara manakala pasaran akan kukuh apabila terdapat elemen kestabilan politik.\\n\\r\\nSementara itu, bagi perbandingan mingguan, ringgit mencatat paras yang tidak diraih sejak Jun tahun ini dengan diniagakan pada 4.3835/3925 berbanding dolar AS daripada 4.4795/4890 pada Jumaat.\\n\\r\\nUnit tempatan diniagakan kebanyakannya tinggi berbanding kumpulan mata wang utama.\\n\\r\\nIa meningkat berbanding dolar Singapura kepada 3.2463/2535 berbanding 3.2573/2645 pada Jumaat lepas, naik berbanding euro kepada 4.6136/6231 daripada 4.6551/6650 dan menokok berbanding pound Britain kepada 5.3781/3892 daripada 5.4094/4209.\\n\\r\\nBagaimanapun, ia susut berbanding yen Jepun kepada 3.2698/2768 daripada 3.2118/2188 pada minggu sebelumnya.\\n\\r\\n-- BERNAMA'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accepted[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "rejected = {'saya', 'awak', 'kami', 'dia', 'mereka', 'anda', 'kita'}\n",
    "\n",
    "tokenize = malaya.tokenizer.Tokenizer().tokenize\n",
    "\n",
    "def headline(string, length = 300):\n",
    "    splitted = malaya.text.function.split_into_sentences(string)\n",
    "    splitted = [s for s in splitted if all([r not in s for r in rejected])]\n",
    "    selected, index, results = '', 0, []\n",
    "    while len(selected) <= length and index < len(splitted):\n",
    "        selected += splitted[index]\n",
    "        results.append(splitted[index])\n",
    "        index += 1\n",
    "    return ' '.join(results)\n",
    "\n",
    "def reject(string):\n",
    "    splitted = malaya.text.function.split_into_sentences(string)\n",
    "    splitted = [s for s in splitted if not len(set(tokenize(s.lower())) & rejected)]\n",
    "    return ' '.join(splitted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "s = 'IBU negara Korea Selatan, Seoul menduduki tempat ketujuh dalam senarai bandar raya paling mahal untuk didiami pada tahun ini, lapor agensi berita Yonhap.'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'KUALA LUMPUR: Ringgit dijangka diniagakan pada paras 4.38 hingga 4.40 berbanding dolar Amerika Syarikat (AS) minggu depan dengan kecenderungan meningkat terutamanya jika pasaran pertukaran asing terus menghadapi kelembapan dalam kitaran peningkatan kadar faedah AS dengan barisan Kabinet baharu Malaysia menawarkan sokongan selanjutnya. Pengarah Urusan SPI Asset Management Stephen Innes berkata ini berdasarkan situasi yang masih tenang dan rancangan China untuk meneruskan pembukaan semula negara itu. Beliau berkata pengumuman barisan Kabinet baharu Malaysia malam tadi akan memberi laluan kepada pembentangan bajet negara manakala pasaran akan kukuh apabila terdapat elemen kestabilan politik. Sementara itu, bagi perbandingan mingguan, ringgit mencatat paras yang tidak diraih sejak Jun tahun ini dengan diniagakan pada 4.3835/3925 berbanding dolar AS daripada 4.4795/4890 pada Jumaat. Unit tempatan diniagakan kebanyakannya tinggi berbanding kumpulan mata wang utama. Ia meningkat berbanding dolar Singapura kepada 3.2463/2535 berbanding 3.2573/2645 pada Jumaat lepas, naik berbanding euro kepada 4.6136/6231 daripada 4.6551/6650 dan menokok berbanding pound Britain kepada 5.3781/3892 daripada 5.4094/4209. Bagaimanapun, ia susut berbanding yen Jepun kepada 3.2698/2768 daripada 3.2118/2188 pada minggu sebelumnya. -- BERNAMA.'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reject(accepted[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['kemampanan sektor TriBE Assembly',\n",
       "  'TriBE Assembly Jumaat Sarawak',\n",
       "  'legasi kemampanan sektor TriBE',\n",
       "  'Industri Kreatif Seni Persembahan',\n",
       "  'alam Kementerian Pelancongan Industri',\n",
       "  'Pelancongan Industri Kreatif Seni',\n",
       "  'Kementerian Pelancongan Industri Kreatif'],\n",
       " ['Ketua Pegawai Eksekutif BESarawak Amelia Roziman',\n",
       "  'Seni Persembahan Sarawak pelan strategik jangka',\n",
       "  'Seri Abdul Karim Rahman Hamzah',\n",
       "  '3rd Business Events Tribal Meet',\n",
       "  'mencapai Strategi Pembangunan Pasca COVID-19',\n",
       "  'penggiat acara perniagaan global berkembang'])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keywords = malaya.keyword.extractive.rake(accepted[2], \n",
    "                                                      vectorizer = bow, \n",
    "                                                      top_k = random.randint(3, 10))\n",
    "keywords = [k[1] for k in keywords]\n",
    "keywords_rake = malaya.keyword.extractive.rake(accepted[2], atleast = 1,\n",
    "                                          top_k = random.randint(3, 10))\n",
    "keywords_rake = [k[1] for k in keywords_rake]\n",
    "keywords, keywords_rake"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "months = {\n",
    "    'january',\n",
    "    'jan',\n",
    "    'januari',\n",
    "    'february',\n",
    "    'feb',\n",
    "    'februari',\n",
    "    'march',\n",
    "    'mac',\n",
    "    'april',\n",
    "    'apr',\n",
    "    'may',\n",
    "    'mei',\n",
    "    'june',\n",
    "    'jun',\n",
    "    'july',\n",
    "    'julai',\n",
    "    'august',\n",
    "    'ogos',\n",
    "    'aug',\n",
    "    'september',\n",
    "    'sep',\n",
    "    'october',\n",
    "    'oktober',\n",
    "    'oct',\n",
    "    'november',\n",
    "    'nov',\n",
    "    'december',\n",
    "    'disember',\n",
    "    'dec',\n",
    "    'utusan',\n",
    "    'malaysiakini',\n",
    "    'astroawani',\n",
    "    'bernama',\n",
    "    'com',\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from malaya.text.rouge import postprocess_summary, filter_rouge, _get_word_ngrams, _rouge_clean, cal_rouge\n",
    "from malaya.text.function import split_into_sentences\n",
    "\n",
    "def filtering_rouge(summary, contents, start = 0.15, increment = 0.05, break_at = 120, n = 1):\n",
    "    filtered = contents[:]\n",
    "    i = 0\n",
    "    while len(filtered.split()) > break_at:\n",
    "        filtered = filter_rouge(summary, filtered, n = n, threshold = start + increment * i)\n",
    "        i += 1\n",
    "    return filtered"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['kemampanan sektor TriBE Assembly',\n",
       "  'TriBE Assembly Jumaat Sarawak',\n",
       "  'legasi kemampanan sektor TriBE',\n",
       "  'Industri Kreatif Seni Persembahan',\n",
       "  'alam Kementerian Pelancongan Industri',\n",
       "  'Pelancongan Industri Kreatif Seni',\n",
       "  'Kementerian Pelancongan Industri Kreatif'],)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "i = 2\n",
    "keywords = malaya.keyword.extractive.rake(accepted[i], \n",
    "                                                  vectorizer = bow, \n",
    "                                                  top_k = 50)\n",
    "keywords = [simple_cleaning(k[1]) for k in keywords if len(k[1].split()) > 1 and len(k[1]) > 10 \\\n",
    "                and len(set(k[1].lower().replace('-', '').split()) & months) == 0]\n",
    "keywords = keywords[:random.randint(3, 10)]\n",
    "keywords, "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|█████████████▌                       | 22841/62150 [04:20<06:38, 98.65it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 37%|█████████████▍                      | 23281/62150 [04:24<04:56, 130.89it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 38%|█████████████▌                      | 23340/62150 [04:25<05:11, 124.58it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 39%|█████████████▉                      | 24038/62150 [04:30<05:12, 121.97it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████████████████████▌             | 38921/62150 [07:23<03:41, 104.77it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 75%|███████████████████████████         | 46699/62150 [08:52<02:28, 104.11it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 77%|███████████████████████████▊        | 48062/62150 [09:07<02:20, 100.14it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 83%|██████████████████████████████▌      | 51350/62150 [09:42<02:00, 89.36it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████▉| 62100/62150 [11:49<00:00, 122.74it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "empty vocabulary; perhaps the documents only contain stop words\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████| 62150/62150 [11:49<00:00, 87.59it/s]\n"
     ]
    }
   ],
   "source": [
    "before, after = [], []\n",
    "for i in tqdm(range(len(accepted))):\n",
    "    h = reject(accepted[i])\n",
    "    try:\n",
    "\n",
    "        keywords = malaya.keyword.extractive.rake(h, vectorizer = bow, top_k = 100)\n",
    "        keywords = [simple_cleaning(k[1]) for k in keywords if len(k[1].split()) > 1 and len(k[1]) > 10 \\\n",
    "                        and len(set(k[1].lower().replace('-', '').split()) & months) == 0]\n",
    "        keywords = keywords[:random.randint(3, 10)]\n",
    "        \n",
    "        r = filtering_rouge('. '.join(keywords), h, n = 2, start = 0.01)\n",
    "        \n",
    "        if len(r.split()) > 20:\n",
    "            before.append(keywords)\n",
    "            after.append(r)\n",
    "        \n",
    "        keywords_rake = malaya.keyword.extractive.rake(h, top_k = 100)\n",
    "        keywords_rake = [simple_cleaning(k[1]) for k in keywords_rake if len(k[1].split()) > 1 and len(k[1]) > 10 \\\n",
    "                        and len(set(k[1].lower().replace('-', '').split()) & months) == 0]\n",
    "        keywords_rake = keywords_rake[:random.randint(3, 10)]\n",
    "        r = filtering_rouge('. '.join(keywords_rake), h, n = 2, start = 0.01)\n",
    "        \n",
    "        if len(r.split()) > 20:\n",
    "            before.append(keywords_rake)\n",
    "            after.append(r)\n",
    "        \n",
    "    except Exception as e:\n",
    "        # print(h, accepted[i]['text'])\n",
    "        print(e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(121822, 121822)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(before), len(after)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['ringgit diniagakan kebanyakannya rendah',\n",
       "  'rendah berbanding dolar AS',\n",
       "  'diniagakan kebanyakannya rendah berbanding',\n",
       "  'kebanyakannya rendah berbanding kumpulan',\n",
       "  'rendah berbanding kumpulan mata',\n",
       "  'Management Stephen Innes ringgit',\n",
       "  'Sementara ringgit diniagakan kebanyakannya',\n",
       "  'berikutan aktiviti pengambilan untung'],\n",
       " 'KUALA LUMPUR: Ringgit kembali ditutup rendah hari ini berbanding dolar AS selepas kenaikan minggu lepas berikutan aktiviti pengambilan untung, kata seorang peniaga. Pada 6 petang, unit tempatan terus susut kepada 4.5060/5130 berbanding dolar AS daripada paras penutup 4.4795/4890 pada Jumaat minggu lepas. Pengarah Urusan SPI Asset Management Stephen Innes berkata ringgit kembali menyusut hari ini apabila kes COVID-19 terus melonjak di China, yang memberi kesan kepada pembukaan semula ekonomi serta membawa kepada prospek lebih lemah. Sementara itu, ringgit diniagakan kebanyakannya rendah berbanding kumpulan mata wang utama lain.')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "before[101], after[101]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('keywords-headline-astroawani.json', 'w') as fopen:\n",
    "    json.dump({'before': before, 'after': after}, fopen)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
